home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
The World of Computer Software
/
The World of Computer Software.iso
/
tags18.zip
/
ASMTAG.C
< prev
next >
Wrap
C/C++ Source or Header
|
1991-11-18
|
20KB
|
559 lines
/*
EPSHeader
File: asmtag.c
Author: J. Kercheval
Created: Sun, 07/14/1991 17:25:26
*/
/*
EPSRevision History
J. Kercheval Sun, 07/14/1991 20:25:59 creation
J. Kercheval Mon, 07/15/1991 22:47:30 finish finite state machine parser
J. Kercheval Wed, 07/17/1991 21:35:43 add IsMember() and get_token()
J. Kercheval Thu, 07/18/1991 19:57:34 add flags checking
J. Kercheval Sun, 07/21/1991 15:58:56 add comment block support
J. Kercheval Sat, 07/27/1991 21:16:53 remove public post process support
J. Kercheval Sat, 07/27/1991 22:50:49 performance considerations (+10%)
J. Kercheval Sat, 08/10/1991 17:48:28 speed up IsMember()
J. Kercheval Sat, 08/17/1991 22:50:29 use unique function names (ASM...)
J. Kercheval Sun, 08/25/1991 23:52:51 fix bug in ASMSymbolWanted()
J. Kercheval Thu, 10/03/1991 12:27:37 fix logic outputting local labels
*/
#include <stdlib.h>
#include <string.h>
#include "asmtag.h"
#include "tagio.h"
/*
* The finite state machine allows the following interesting paths
*
* 1 - Discard, Parse1, Symbol1
* 2 - Discard, Parse1, Parse2, Symbol2
* 3 - Discard, Parse1, Parse2, Define
*
* all the important cases follow one of these paths according to MASM/TASM
* syntax. The exit state is for finish up routine calls and some paths not
* covered here are simple error paths and probably result from syntax errors
*/
enum state {
Discard, Parse1, Parse2, Symbol1, Symbol2, Define, Exit
};
typedef enum state State;
#define COMMENT_CHAR ';'
#define SYMBOL_SIZE 15
/*----------------------------------------------------------------------------
*
* The symbol lists represent all the symbols we are interested in either
* obtaining or ignoring. The order of some of these token lists is
* important for determining if ouput should be performed. If you want to
* change these lists make sure that flag checking is altered to change the
* changed order. The first element of each of these symbol lists is a
* string containing all the first characters within the symbol list. This
* allows faster rejection for IsMember() which is called often.
*
---------------------------------------------------------------------------*/
/* symbols which are not significant for this parser */
char ASM_NOP_Sym[][SYMBOL_SIZE] =
{
"cpbfnwo", /* list of starting characters of symbols
* below */
"c", /* C language declaration */
"pascal", /* PASCAL language declaration */
"basic", /* BASIC language declaration */
"fortran", /* FORTRAN language declaration */
"prolog", /* PROLOG language declaration */
"nolanguage", /* generic language declaration */
"windows", /* WINDOWS exit and entry modifier */
"oddnear", /* overlay modifier */
"oddfar", /* overlay modifier */
"normal", /* normal procedure entry/exit code */
"\0"
};
/* symbols which begin a comment block */
char ASM_comment_block[][SYMBOL_SIZE] =
{
"c", /* list of starting characters of symbols
* below */
"comment", /* begin comment block, next character is
* delimiter */
"\0"
};
/* create the function for determining if a character is a delimiter */
#define IsDelim(c) ( _ASM_delim_table[c] )
/* the indexed table for white space character lookup */
BOOLEAN _ASM_delim_table[256];
/* valid delimiters for this syntax */
char ASM_delim[] = " \t;:=.,\"()<>[]*-+/";
/* create the function for determining if a character is a whitespace */
#define IsWhite(c) ( _ASM_white_table[c] )
/* the indexed table for white space character lookup */
BOOLEAN _ASM_white_table[256];
/* whitespace characters */
char ASM_white[] = " \t\v\f";
/* symbols which both are delimiters and a special token, these are
special tokens only when found at the the beginning of a string of
1 or more delimiters */
char ASM_delim_Sym[] = "=:";
/* symbols which fit into the Define state and represent a tagged symbol */
/* state Define depends on the token ":" being at index 1 in this list */
char ASM_def[][SYMBOL_SIZE] =
{
":e=cd", /* list of starting characters of symbols
* below */
":", /* local labels */
"equ", /* equivalence */
"=", /* equivalence */
"catstr", /* concatenated and named strings */
"db", /* named byte data definition */
"dw", /* named word data definition */
"dd", /* named double word data definition */
"dp", /* named 6 byte far pointer data area
* definition */
"df", /* named 6 byte far pointer definition */
"dq", /* named quad word data definition */
"dt", /* named 10 byte data area */
"\0"
};
/* symbols which fit into the Symbol state and represent a tagged symbol */
char ASM_sym[][SYMBOL_SIZE] =
{
"pmlsu", /* list of starting character of symbols
* below */
"proc", /* procedures */
"macro", /* macros */
"label", /* local labels */
"struc", /* structures */
"union", /* unions */
"\0"
};
/*----------------------------------------------------------------------------
*
* ASMParserInit() initializes the tables required by the parser The tables
* used are a simple boolean index which are true if the character
* corresponding to the index is a member of the associated table.
*
---------------------------------------------------------------------------*/
void ASMParserInit()
{
char *s;
int i;
/* init the entire block to FALSE */
for (i = 0; i < 256; i++) {
_ASM_delim_table[i] = FALSE;
_ASM_white_table[i] = FALSE;
}
/* set the characters in the delim set to TRUE */
for (s = ASM_delim; *s; s++) {
_ASM_delim_table[*s] = TRUE;
}
/* NULL is also a delimiter */
_ASM_delim_table['\0'] = TRUE;
/* set the characters in the white set to TRUE */
for (s = ASM_white; *s; s++) {
_ASM_white_table[*s] = TRUE;
}
}
/*----------------------------------------------------------------------------
*
* ASMSymbolWanted() returns true if the index into the sym token list is one
* of the wanted symbols according to the flags list. The indexes belong
* with the following symbols and flags:
*
* Flag Symbol Index
* --------- ------- -----
* flags->af "proc" 1
* flags->am "macro" 2
* flags->al "label" 3
* flags->as "struc" 4
* flags->au "union" 5
*
---------------------------------------------------------------------------*/
BOOLEAN ASMSymbolWanted(Flags * flags, int index)
{
/* return true if the associated flag is true */
switch (index) {
case 1:
return flags->af;
break;
case 2:
return flags->am;
break;
case 3:
return flags->al;
break;
case 4:
return flags->as;
break;
case 5:
return flags->au;
break;
default:
return FALSE;
break;
}
}
/*----------------------------------------------------------------------------
*
* ASMIsMember() takes the token passed and check for membership in the null
* terminated array, tokenlist, and return TRUE if a member and FALSE
* otherwise, index is the index into the token list of the symbol if return
* value is TRUE
*
---------------------------------------------------------------------------*/
BOOLEAN ASMIsMember(char token_list[][SYMBOL_SIZE], char *token, int *index)
{
/* look for dirty rejection */
if (!strchr(token_list[0], tolower(token[0])))
return FALSE;
/* march through array until membership is determined */
for (*index = 1; *token_list[*index]; (*index)++) {
/* return true if token found */
if (!stricmp(token, token_list[*index])) {
return TRUE;
}
}
/* did not find it */
return FALSE;
}
/*----------------------------------------------------------------------------
*
* ASM_get_token() will obtain the next token in the line pointed to by lptr
* and in addition will return FALSE if EOL is reached or a comment character
* is the first non whitespace character found
*
---------------------------------------------------------------------------*/
BOOLEAN ASM_get_token(char **lptr, char *token)
{
char *s; /* start location in string */
int token_length; /* the length of the current token */
int dummy; /* a temporary variable */
/* loop until we have a valid token or end of string */
do {
/* move past whitespace */
while (IsWhite(**lptr)) {
(*lptr)++;
}
/* return false if end of line */
if (!**lptr)
return FALSE;
/* check if comment */
if (**lptr == COMMENT_CHAR) {
return FALSE;
}
/* check for delimiter token */
if (strchr(ASM_delim_Sym, **lptr)) {
token[0] = **lptr;
token[1] = '\0';
(*lptr)++;
}
else {
/* save the beginning location */
s = *lptr;
/* move to the next delimiter in the line */
while (!IsDelim(**lptr)) {
(*lptr)++;
}
/* get the token */
token_length = *lptr - s;
strncpy(token, s, token_length);
token[token_length] = '\0';
}
} while (ASMIsMember(ASM_NOP_Sym, token, &dummy));
return TRUE;
}
/*----------------------------------------------------------------------------
*
* ASMtags() tags an input stream assuming input format of ASM 80x86 format
* in MASM/TASM syntax
*
---------------------------------------------------------------------------*/
#define TOKEN_LINE_LENGTH 256
void ASMTags(FILE * infile, char *infname, FILE * outfile, Flags * flags)
{
State state; /* the current state of the parser */
char line[TOKEN_LINE_LENGTH]; /* the current input line */
char cur_token[TOKEN_LINE_LENGTH]; /* the current token */
char prev_token[TOKEN_LINE_LENGTH]; /* the previous token */
char *lptr; /* pointer into line for token parser */
char *prev_lptr; /* pointer into line for previous token */
long int line_number; /* the current line in the file */
int line_length; /* the length of the current line */
long int char_number; /* the current character in the file */
int symbol_index; /* the index into the token list of the
* symbol */
/* init the engine */
ASMParserInit();
cur_token[0] = '\0';
prev_token[0] = '\0';
state = Discard;
line_number = 0;
line_length = 0;
char_number = -1;
lptr = prev_lptr = (char *) NULL;
for (;;) {
switch (state) {
case Discard: /* current line is not valid */
/* if EOF then return */
if (GetLine(infile, line, TOKEN_LINE_LENGTH)) {
lptr = line;
/* increment counters */
line_number++;
/* char_number increments by length of previous line */
char_number += line_length + 1;
/* line length */
line_length = strlen(line);
state = Parse1;
}
else {
state = Exit;
}
break;
case Parse1: /* parsing for first *special* token */
/* get the next valid token */
if (!ASM_get_token(&lptr, cur_token)) {
/* if no token left or a comment as first non white space
* char in remainder of line */
state = Discard;
}
else {
/* move the cur_token to prev_token */
strcpy(prev_token, cur_token);
/* check for membership in the tagging symbol club */
if (ASMIsMember(ASM_sym, cur_token, &symbol_index)) {
state = Symbol1;
}
else {
/* check if comment block */
if (ASMIsMember(ASM_comment_block,
cur_token, &symbol_index)) {
/* get the next non white character, this makes
* the assumption that the delimiter character is
* on the same line as the comment symbol. If the
* delimiter character is not on the current line
* then parsing continues normally on the next
* line. */
while (IsWhite(*lptr)) {
lptr++;
}
if (*lptr) {
/* this is the delimiter character, store it
* and move lptr past it */
*cur_token = *lptr;
lptr++;
/* move over comment block, remembering to
* update line info as we go */
while (*lptr != *cur_token) {
/* get a new line if end of line */
if (!*lptr) {
if (!GetLine(infile, line,
TOKEN_LINE_LENGTH)) {
*cur_token = *lptr;
}
else {
lptr = line;
/* increment counters */
line_number++;
/* char_number increments by
* length of previous line */
char_number += line_length + 1;
/* line length */
line_length = strlen(line);
}
}
else {
lptr++;
}
}
}
state = Discard;
}
else {
/* nothing special, parse the next symbol */
state = Parse2;
}
}
}
break;
case Parse2: /* parsing for second *special* token */
/* save the previous position */
prev_lptr = lptr;
/* get the next token */
if (!ASM_get_token(&lptr, cur_token)) {
/* no token left, reset machine */
state = Discard;
}
else {
if (ASMIsMember(ASM_sym, cur_token, &symbol_index)) {
/* found a major symbol */
state = Symbol2;
}
else {
if (ASMIsMember(ASM_def, cur_token, &symbol_index)) {
/* found a defining token */
state = Define;
}
else {
state = Discard;
}
}
}
break;
case Symbol1: /* next token, ignore if no token found */
/* get the next symbol and output it */
if (ASM_get_token(&lptr, cur_token)) {
/* output if wanted */
if (ASMSymbolWanted(flags, symbol_index)) {
OutputTag(outfile, line, cur_token, infname,
line_number, char_number +
abs(lptr - line) -
strlen(cur_token),
flags);
}
}
/* reset machine */
state = Discard;
break;
case Symbol2: /* previous token was the wanted symbol */
/* the previous token is the symbol of interest */
/* output if wanted */
if (ASMSymbolWanted(flags, symbol_index)) {
OutputTag(outfile, line, prev_token, infname,
line_number, char_number +
abs(prev_lptr - line) -
strlen(prev_token),
flags);
}
/* reset machine */
state = Discard;
break;
case Define: /* previous token was the wanted symbol */
/* the previous token is the symbol of interest */
/* output if wanted */
if ((flags->ad && symbol_index != 1) ||
(flags->al && symbol_index == 1)) {
OutputTag(outfile, line, prev_token, infname,
line_number, char_number +
abs(prev_lptr - line) -
strlen(prev_token),
flags);
}
/* reset machine */
state = Discard;
break;
case Exit: /* clean it up */
return;
break;
default: /* not reached */
break;
}
}
}